import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle

# Load the dataset
melbourne_housing = pd.read_csv(os.path.join(sys.argv[1], 'melb_data.csv'))

affordable_housing = melbourne_housing[(melbourne_housing['Price'] >= 300000) & (melbourne_housing['Price'] <= 600000)]

print(affordable_housing)
# pickle.dump(affordable_housing,open("./ref_result/affordable_housing.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

number_of_properties = len(affordable_housing)

print(number_of_properties)
# pickle.dump(number_of_properties,open("./ref_result/number_of_properties.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

top_suburbs = affordable_housing['Suburb'].value_counts().head(10)
top_regions = affordable_housing['Regionname'].value_counts().head(10)
top_suburbs_list = top_suburbs.index.tolist()
top_regions_list = top_regions.index.tolist()

print(top_suburbs_list)
# pickle.dump(top_suburbs_list,open("./ref_result/top_suburbs_list.pkl","wb"))

print(top_regions_list)
# pickle.dump(top_regions_list,open("./ref_result/top_regions_list.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

affordable_housing_within_20km = affordable_housing[affordable_housing['Distance'] <= 20]

print(affordable_housing_within_20km)
# pickle.dump(affordable_housing_within_20km,open("./ref_result/affordable_housing_within_20km.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

top_suburbs_within_20km = affordable_housing_within_20km['Suburb'].value_counts().head(10)

print(affordable_housing_within_20km)
# pickle.dump(affordable_housing_within_20km,open("./ref_result/affordable_housing_within_20km.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

top_suburbs_within_20km_list = top_suburbs_within_20km.index.tolist()

print(top_suburbs_within_20km_list)
# pickle.dump(top_suburbs_within_20km_list,open("./ref_result/top_suburbs_within_20km_list.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

melbourne_housing['Date'] = pd.to_datetime(melbourne_housing['Date'], dayfirst=True)

print(melbourne_housing)
# pickle.dump(melbourne_housing,open("./ref_result/melbourne_housing.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

past_5_years = melbourne_housing[melbourne_housing['Date'] >= (melbourne_housing['Date'].max() - pd.DateOffset(years=5))]

print(past_5_years)
# pickle.dump(past_5_years,open("./ref_result/past_5_years.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

affordable_housing_past_5_years = past_5_years[(past_5_years['Price'] >= 300000) & (past_5_years['Price'] <= 600000)]

print(affordable_housing_past_5_years)
# pickle.dump(affordable_housing_past_5_years,open("./ref_result/affordable_housing_past_5_years.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

affordable_houses = affordable_housing_past_5_years[affordable_housing_past_5_years['Type'] == 'h']
number_of_affordable_houses = len(affordable_houses)

print(affordable_houses)
# pickle.dump(affordable_houses,open("./ref_result/affordable_houses.pkl","wb"))

print(number_of_affordable_houses)
# pickle.dump(number_of_affordable_houses,open("./ref_result/number_of_affordable_houses.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

# Calculate the number of affordable townhouses sold
affordable_townhouses = affordable_housing_past_5_years[affordable_housing_past_5_years['Type'] == 't']
number_of_affordable_townhouses = len(affordable_townhouses)
 
# Calculate the number of affordable units sold
affordable_units = affordable_housing_past_5_years[affordable_housing_past_5_years['Type'] == 'u']
number_of_affordable_units = len(affordable_units)

print(number_of_affordable_townhouses)
# pickle.dump(number_of_affordable_townhouses,open("./ref_result/number_of_affordable_townhouses.pkl","wb"))

print(number_of_affordable_units)
# pickle.dump(number_of_affordable_units,open("./ref_result/number_of_affordable_units.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

affordable_housing_past_5_years['Year'] = affordable_housing_past_5_years['Date'].dt.year

print(affordable_housing_past_5_years)
# pickle.dump(affordable_housing_past_5_years,open("./ref_result/affordable_housing_past_5_years.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

annual_number_of_affordable_properties = affordable_housing_past_5_years.groupby('Year').size()
average_price_of_affordable_properties = affordable_housing_past_5_years.groupby('Year')['Price'].mean()

print(annual_number_of_affordable_properties)
# pickle.dump(annual_number_of_affordable_properties,open("./ref_result/annual_number_of_affordable_properties.pkl","wb"))

print(average_price_of_affordable_properties)
# pickle.dump(average_price_of_affordable_properties,open("./ref_result/average_price_of_affordable_properties.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

percentage_change_annual_number = annual_number_of_affordable_properties.pct_change() * 100
percentage_change_average_price = average_price_of_affordable_properties.pct_change() * 100

print(percentage_change_annual_number)
# pickle.dump(percentage_change_annual_number,open("./ref_result/percentage_change_annual_number.pkl","wb"))

print(percentage_change_average_price)
# pickle.dump(percentage_change_average_price,open("./ref_result/percentage_change_average_price.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

annual_metrics = pd.DataFrame({'Number of Affordable Properties Sold': annual_number_of_affordable_properties,
                               'Average Price': average_price_of_affordable_properties,
                               'Percentage Change in Number Sold': percentage_change_annual_number,
                               'Percentage Change in Average Price': percentage_change_average_price})

print(annual_metrics)
# pickle.dump(annual_metrics,open("./ref_result/annual_metrics.pkl","wb"))


import pandas as pd   
import pickle

# Load the dataset

suburb_yearly_demand = affordable_housing_past_5_years.groupby(['Suburb', 'Year']).size().reset_index(name='Number of Affordable Properties Sold')

print(suburb_yearly_demand)
# pickle.dump(suburb_yearly_demand,open("./ref_result/suburb_yearly_demand.pkl","wb"))




import pandas as pd   
import pickle

# Load the dataset

suburb_yearly_demand_pivot = suburb_yearly_demand.pivot_table(index='Suburb', columns='Year', values='Number of Affordable Properties Sold', fill_value=0)

print(suburb_yearly_demand_pivot)
pickle.dump(suburb_yearly_demand_pivot,open("./ref_result/suburb_yearly_demand_pivot.pkl","wb"))
